Analyze selection data using soluble Ephrin-B2 or -B3¶
In [1]:
# this cell is tagged as parameters for `papermill` parameterization
#input configs
altair_config = None
nipah_config = None
#input files
entropy_file = None
func_scores_E2_file = None
binding_E2_file = None
func_scores_E3_file = None
binding_E3_file = None
#output files
filtered_E2_binding_data = None
filtered_E3_binding_data = None
#output images
entry_binding_combined_corr_plot = None
entry_binding_combined_corr_plot_agg = None
E2_binding_heatmap = None
E3_binding_heatmap = None
E2_E3_correlation = None
E2_E3_correlation_site = None
combined_E2_E3_site_corr = None
binding_by_site_plot = None
In [2]:
# Parameters
func_scores_E2_file = "results/func_effects/averages/CHO_EFNB2_low_func_effects.csv"
binding_E2_file = "results/receptor_affinity/averages/EFNB2_monomeric_mut_effect.csv"
func_scores_E3_file = "results/func_effects/averages/CHO_EFNB3_low_func_effects.csv"
binding_E3_file = "results/receptor_affinity/averages/EFNB3_dimeric_mut_effect.csv"
filtered_E2_binding_data = "results/filtered_data/E2_binding_filtered.csv"
filtered_E3_binding_data = "results/filtered_data/E3_binding_filtered.csv"
entry_binding_combined_corr_plot = (
"results/images/entry_binding_combined_corr_plot.html"
)
entry_binding_combined_corr_plot_agg = (
"results/images/entry_binding_combined_corr_plot_agg.html"
)
E2_binding_heatmap = "results/images/E2_binding_heatmap.html"
E3_binding_heatmap = "results/images/E3_binding_heatmap.html"
nipah_config = "nipah_config.yaml"
altair_config = "data/custom_analyses_data/theme.py"
E2_E3_correlation = "results/images/E2_E3_correlation.html"
E2_E3_correlation_site = "results/images/E2_E3_correlation_site.html"
combined_E2_E3_site_corr = "results/images/combined_E2_E3_site_corr.html"
entropy_file = "results/entropy/entropy.csv"
binding_by_site_plot = "results/images/binding_by_site_plot.html"
In [3]:
if binding_by_site_plot is None:
print('this is being run manually')
else:
print('papermill!')
papermill!
In [4]:
import math
import os
import re
import altair as alt
import numpy as np
import pandas as pd
import scipy.stats
import yaml
In [5]:
# allow more rows for Altair
_ = alt.data_transformers.disable_max_rows()
if os.getcwd() == '/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/':
pass
print("Already in correct directory")
else:
os.chdir("/fh/fast/bloom_j/computational_notebooks/blarsen/2023/Nipah_Malaysia_RBP_DMS/")
print("Setup in correct directory")
Setup in correct directory
In [6]:
##hard paths in case don't want to run with snakemake
#
#altair_config = "data/custom_analyses_data/theme.py"
#nipah_config = "nipah_config.yaml"
#
##input files
#func_scores_E2_file = "results/func_effects/averages/CHO_EFNB2_low_func_effects.csv"
#binding_E2_file = "results/receptor_affinity/averages/EFNB2_monomeric_mut_effect.csv"
#func_scores_E3_file = "results/func_effects/averages/CHO_EFNB3_low_func_effects.csv"
#binding_E3_file = "results/receptor_affinity/averages/EFNB3_dimeric_mut_effect.csv"
Run config files to setup altair theme and config variables¶
In [7]:
if altair_config:
with open(altair_config, 'r') as file:
exec(file.read())
with open(nipah_config) as f:
config = yaml.safe_load(f)
Make the E2/E3 dataframes, filter separately, then merge¶
In [8]:
e2 = pd.read_csv(binding_E2_file)
e2_func = pd.read_csv(func_scores_E2_file)
e3 = pd.read_csv(binding_E3_file)
e3_func = pd.read_csv(func_scores_E3_file)
def merge_func_binding_dfs(func,binding,name):
df_int = pd.merge(
binding,
func,
on=['site','mutant','wildtype'],
suffixes=['_affinity','_cell_entry'],
validate='one_to_one',
how='outer'
)
df = df_int.rename(columns={'Ephrin binding_mean':'binding_mean','Ephrin binding_std':'binding_std','Ephrin binding_median':'binding_median'})
#only filter func effects
df_pre_filter = df[
(df['mutant'] != '*') &
(df['mutant'] != '-') &
(df['site'] != 603) &
(df['effect'] >= config['min_func_effect_for_binding']) &
(df['times_seen_cell_entry'] >= config['func_times_seen_cutoff']) &
(df['effect_std'] <= config['func_std_cutoff'])
]
#Now filter binding
df_post_filter = df_pre_filter[
(df_pre_filter['times_seen_affinity'] >= config['min_times_seen_binding']) &
(df_pre_filter['binding_std'] <= config['max_binding_std']) &
(df_pre_filter['frac_models'] >= config['frac_models'])
]
def plot_affinity_corr(df):
if name == 'E2':
color = '#1f4e79'
else:
color = '#ff7f0e'
chart = alt.Chart(df).mark_point(size=30, color='black', opacity=0.2, filled=True).encode(
x=alt.X('effect', title=f'{name} Cell Entry', axis=alt.Axis(grid=True)),
y=alt.Y('binding_median', title=f'{name} Binding', axis=alt.Axis(grid=True)),
tooltip=['site', 'wildtype', 'mutant'],
).properties(
width=alt.Step(10),
height=alt.Step(10),
)
return chart.display()
def plot_times_seen_std(df):
if name == 'E2':
color = '#1f4e79'
else:
color = '#ff7f0e'
chart = alt.Chart(df).mark_circle(size=20,color='black',opacity=0.2).encode(
x=alt.X('times_seen_affinity',axis=alt.Axis(grid=True),title=f'Times Seen for {name}',scale=alt.Scale(type='log')),
y=alt.Y('binding_std',title='Binding Std',axis=alt.Axis(grid=True)),
tooltip=['site','times_seen_affinity','effect_std','effect']
).properties(
height=alt.Step(10),
width=alt.Step(10)
)
return chart.display()
plot_affinity_corr(df)
entry_vs_binding = plot_affinity_corr(df_post_filter)
#entry_vs_binding.display()
#entry_vs_binding.save(entry_vs_binding)
plot_times_seen_std(df)
plot_times_seen_std(df_post_filter)
#For pulling out low effect mutants for heatmaps later
def store_filtered_info(df):
df_filter = df[
(df['mutant'] != '*') &
(df['mutant'] != '-') &
(df['site'] != 603) &
(df['effect'] < config['min_func_effect_for_binding']) &
(df['times_seen_cell_entry'] >= config['func_times_seen_cutoff']) &
(df['effect_std'] <= config['func_std_cutoff'])
]
return df_filter
df_low_effect_filter = store_filtered_info(df)
return df_post_filter,df_low_effect_filter
df_E2_filter,df_E2_filter_missing = merge_func_binding_dfs(e2_func,e2,'EFNB2')
df_E3_filter,df_E3_filter_missing = merge_func_binding_dfs(e3_func,e3,'EFNB3')
def plot_corr_binding_entry(df,name):
chart = alt.Chart(df).mark_point(size=30, color='black', opacity=0.2, filled=True).encode(
x=alt.X('effect', title=f'{name} Cell Entry', axis=alt.Axis(grid=True)),
y=alt.Y('binding_median', title=f'{name} Binding', axis=alt.Axis(grid=True)),
tooltip=['site', 'wildtype', 'mutant','binding_median','times_seen_affinity','effect'],
).properties(
width=alt.Step(10),
height=alt.Step(10),
)
return chart
#E2_binding_entry_tmp = plot_corr_binding_entry(df_E2_filter,'EFNB2')
#E2_binding_entry_tmp.display()
#E2_binding_entry_tmp.save(E2_binding_entry)
#E3_binding_entry_tmp = plot_corr_binding_entry(df_E3_filter,'EFNB3')
#E3_binding_entry_tmp.display()
#E3_binding_entry_tmp.save(E3_binding_entry)
#Save filtered dataframes for crystal structure mapping
df_E2_filter.to_csv(filtered_E2_binding_data,index=False)
df_E3_filter.to_csv(filtered_E3_binding_data,index=False)
#Now that they are filtered, merge EFNB2 and EFNB3
df_affinity_filter_merge = pd.merge(
df_E2_filter,
df_E3_filter,
on=['site','wildtype','mutant'],
suffixes=['_E2','_E3'],
how='outer'
)
#Add columns that calculate difference between EFNB2 and EFNB3 cell entry and EFNB2 and EFNB3 binding.
df_affinity_filter_merge['func_effect_diff'] = (df_affinity_filter_merge['effect_E2'] - df_affinity_filter_merge['effect_E3']).abs()
df_affinity_filter_merge['binding_effect_diff'] = (df_affinity_filter_merge['binding_mean_E2'] - df_affinity_filter_merge['binding_mean_E3']).abs()
#display stats
display(df_affinity_filter_merge[['binding_std_E2','binding_std_E3','binding_median_E2','binding_median_E3']].describe())